U.S. Car Accidents (2016-2023)

Import and Clean Data

if (!exists("accidents_raw")) {
  accidents_raw <- read_csv(here("data", "raw_data", "US_Accidents_March23.csv"), lazy = FALSE)
}

acc <- accidents_raw

# cleanup
acc <- acc %>%
  filter(between(`Temperature(F)`, -60, 130)) %>%
  mutate(
    date_ = date(Start_Time),
    year_ = year(Start_Time),
    month_ = month(Start_Time),
    hour_ = hour(Start_Time),
    `Precipitation(in)` = coalesce(`Precipitation(in)`, 0),
    any_precip = `Precipitation(in)` > 0,
    Weather_Condition = coalesce(Weather_Condition, "Unknown"),
    `Temperature(F)` = coalesce(`Temperature(F)`, mean(`Temperature(F)`, na.rm = T))
  )

# augmentation
state_lookup <- tibble(State  = state.abb, state_name  = str_to_title(state.name))

acc <- acc %>%
  mutate(
    sevg = case_when(
      Severity == 1 ~ "least severe",
      Severity == 2 ~ "less severe",
      Severity == 3 ~ "more severe",
      TRUE ~ "most severe"
    )
  ) %>%
  inner_join(state_lookup, by = "State")

Descriptive Analysis

summary(acc)
##       ID               Source             Severity    
##  Length:7546771     Length:7546771     Min.   :1.000  
##  Class :character   Class :character   1st Qu.:2.000  
##  Mode  :character   Mode  :character   Median :2.000  
##                                        Mean   :2.212  
##                                        3rd Qu.:2.000  
##                                        Max.   :4.000  
##                                                       
##    Start_Time                     End_Time                     Start_Lat    
##  Min.   :2016-01-14 20:18:33   Min.   :2016-02-08 06:37:08   Min.   :24.55  
##  1st Qu.:2018-11-20 16:22:02   1st Qu.:2018-11-20 17:22:44   1st Qu.:33.38  
##  Median :2020-11-10 08:23:39   Median :2020-11-10 15:11:14   Median :35.80  
##  Mean   :2020-06-02 04:07:56   Mean   :2020-06-02 11:34:12   Mean   :36.19  
##  3rd Qu.:2022-01-19 08:15:20   3rd Qu.:2022-01-19 19:01:21   3rd Qu.:40.11  
##  Max.   :2023-03-31 23:30:00   Max.   :2023-03-31 23:59:00   Max.   :49.00  
##                                                                             
##    Start_Lng          End_Lat           End_Lng         Distance(mi)    
##  Min.   :-124.62   Min.   :24.6      Min.   :-124.5    Min.   :  0.000  
##  1st Qu.:-117.22   1st Qu.:33.4      1st Qu.:-117.8    1st Qu.:  0.000  
##  Median : -87.81   Median :36.1      Median : -88.1    Median :  0.028  
##  Mean   : -94.71   Mean   :36.2      Mean   : -95.8    Mean   :  0.558  
##  3rd Qu.: -80.38   3rd Qu.:40.2      3rd Qu.: -80.3    3rd Qu.:  0.460  
##  Max.   : -67.11   Max.   :49.1      Max.   : -67.1    Max.   :441.750  
##                    NA's   :3341777   NA's   :3341777                    
##  Description           Street              City              County         
##  Length:7546771     Length:7546771     Length:7546771     Length:7546771    
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##     State             Zipcode            Country            Timezone        
##  Length:7546771     Length:7546771     Length:7546771     Length:7546771    
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  Airport_Code       Weather_Timestamp             Temperature(F)  
##  Length:7546771     Min.   :2016-01-14 19:51:00   Min.   :-58.00  
##  Class :character   1st Qu.:2018-11-20 16:15:00   1st Qu.: 49.00  
##  Mode  :character   Median :2020-11-10 08:30:00   Median : 64.00  
##                     Mean   :2020-06-02 04:08:26   Mean   : 61.67  
##                     3rd Qu.:2022-01-19 07:58:00   3rd Qu.: 76.00  
##                     Max.   :2023-03-31 23:53:00   Max.   :129.20  
##                                                                   
##  Wind_Chill(F)      Humidity(%)      Pressure(in)   Visibility(mi)  
##  Min.   :-80.00    Min.   :  1.00   Min.   : 0.00   Min.   :  0.00  
##  1st Qu.: 43.00    1st Qu.: 48.00   1st Qu.:29.37   1st Qu.: 10.00  
##  Median : 62.00    Median : 67.00   Median :29.86   Median : 10.00  
##  Mean   : 58.25    Mean   : 64.84   Mean   :29.54   Mean   :  9.09  
##  3rd Qu.: 75.00    3rd Qu.: 84.00   3rd Qu.:30.03   3rd Qu.: 10.00  
##  Max.   :128.00    Max.   :100.00   Max.   :58.63   Max.   :140.00  
##  NA's   :1833858   NA's   :10278    NA's   :7904    NA's   :39499   
##  Wind_Direction     Wind_Speed(mph)   Precipitation(in)   Weather_Condition 
##  Length:7546771     Min.   :   0.00   Min.   : 0.000000   Length:7546771    
##  Class :character   1st Qu.:   4.60   1st Qu.: 0.000000   Class :character  
##  Mode  :character   Median :   7.00   Median : 0.000000   Mode  :character  
##                     Mean   :   7.69   Mean   : 0.006127                     
##                     3rd Qu.:  10.40   3rd Qu.: 0.000000                     
##                     Max.   :1087.00   Max.   :36.470000                     
##                     NA's   :429617                                          
##   Amenity           Bump          Crossing        Give_Way      
##  Mode :logical   Mode :logical   Mode :logical   Mode :logical  
##  FALSE:7453817   FALSE:7543317   FALSE:6688257   FALSE:7511266  
##  TRUE :92954     TRUE :3454      TRUE :858514    TRUE :35505    
##                                                                 
##                                                                 
##                                                                 
##                                                                 
##   Junction        No_Exit         Railway        Roundabout     
##  Mode :logical   Mode :logical   Mode :logical   Mode :logical  
##  FALSE:6990004   FALSE:7527521   FALSE:7481985   FALSE:7546527  
##  TRUE :556767    TRUE :19250     TRUE :64786     TRUE :244      
##                                                                 
##                                                                 
##                                                                 
##                                                                 
##   Station           Stop         Traffic_Calming Traffic_Signal 
##  Mode :logical   Mode :logical   Mode :logical   Mode :logical  
##  FALSE:7348460   FALSE:7337406   FALSE:7539342   FALSE:6424853  
##  TRUE :198311    TRUE :209365    TRUE :7429      TRUE :1121918  
##                                                                 
##                                                                 
##                                                                 
##                                                                 
##  Turning_Loop    Sunrise_Sunset     Civil_Twilight     Nautical_Twilight 
##  Mode :logical   Length:7546771     Length:7546771     Length:7546771    
##  FALSE:7546771   Class :character   Class :character   Class :character  
##                  Mode  :character   Mode  :character   Mode  :character  
##                                                                          
##                                                                          
##                                                                          
##                                                                          
##  Astronomical_Twilight     date_                year_          month_    
##  Length:7546771        Min.   :2016-01-14   Min.   :2016   Min.   : 1.0  
##  Class :character      1st Qu.:2018-11-20   1st Qu.:2018   1st Qu.: 3.0  
##  Mode  :character      Median :2020-11-10   Median :2020   Median : 7.0  
##                        Mean   :2020-06-01   Mean   :2020   Mean   : 6.7  
##                        3rd Qu.:2022-01-19   3rd Qu.:2022   3rd Qu.:10.0  
##                        Max.   :2023-03-31   Max.   :2023   Max.   :12.0  
##                                                                          
##      hour_       any_precip          sevg            state_name       
##  Min.   : 0.00   Mode :logical   Length:7546771     Length:7546771    
##  1st Qu.: 8.00   FALSE:7016077   Class :character   Class :character  
##  Median :13.00   TRUE :530694    Mode  :character   Mode  :character  
##  Mean   :12.33                                                        
##  3rd Qu.:17.00                                                        
##  Max.   :23.00                                                        
## 
dim(acc)
## [1] 7546771      53
str(acc)
## tibble [7,546,771 × 53] (S3: tbl_df/tbl/data.frame)
##  $ ID                   : chr [1:7546771] "A-1" "A-2" "A-3" "A-4" ...
##  $ Source               : chr [1:7546771] "Source2" "Source2" "Source2" "Source2" ...
##  $ Severity             : num [1:7546771] 3 2 2 3 2 3 2 3 2 3 ...
##  $ Start_Time           : POSIXct[1:7546771], format: "2016-02-08 05:46:00" "2016-02-08 06:07:59" ...
##  $ End_Time             : POSIXct[1:7546771], format: "2016-02-08 11:00:00" "2016-02-08 06:37:59" ...
##  $ Start_Lat            : num [1:7546771] 39.9 39.9 39.1 39.7 39.6 ...
##  $ Start_Lng            : num [1:7546771] -84.1 -82.8 -84 -84.2 -84.2 ...
##  $ End_Lat              : num [1:7546771] NA NA NA NA NA NA NA NA NA NA ...
##  $ End_Lng              : num [1:7546771] NA NA NA NA NA NA NA NA NA NA ...
##  $ Distance(mi)         : num [1:7546771] 0.01 0.01 0.01 0.01 0.01 0.01 0 0.01 0 0.01 ...
##  $ Description          : chr [1:7546771] "Right lane blocked due to accident on I-70 Eastbound at Exit 41 OH-235 State Route 4." "Accident on Brice Rd at Tussing Rd. Expect delays." "Accident on OH-32 State Route 32 Westbound at Dela Palma Rd. Expect delays." "Accident on I-75 Southbound at Exits 52 52B US-35. Expect delays." ...
##  $ Street               : chr [1:7546771] "I-70 E" "Brice Rd" "State Route 32" "I-75 S" ...
##  $ City                 : chr [1:7546771] "Dayton" "Reynoldsburg" "Williamsburg" "Dayton" ...
##  $ County               : chr [1:7546771] "Montgomery" "Franklin" "Clermont" "Montgomery" ...
##  $ State                : chr [1:7546771] "OH" "OH" "OH" "OH" ...
##  $ Zipcode              : chr [1:7546771] "45424" "43068-3402" "45176" "45417" ...
##  $ Country              : chr [1:7546771] "US" "US" "US" "US" ...
##  $ Timezone             : chr [1:7546771] "US/Eastern" "US/Eastern" "US/Eastern" "US/Eastern" ...
##  $ Airport_Code         : chr [1:7546771] "KFFO" "KCMH" "KI69" "KDAY" ...
##  $ Weather_Timestamp    : POSIXct[1:7546771], format: "2016-02-08 05:58:00" "2016-02-08 05:51:00" ...
##  $ Temperature(F)       : num [1:7546771] 36.9 37.9 36 35.1 36 37.9 34 34 33.3 37.4 ...
##  $ Wind_Chill(F)        : num [1:7546771] NA NA 33.3 31 33.3 35.5 31 31 NA 33.8 ...
##  $ Humidity(%)          : num [1:7546771] 91 100 100 96 89 97 100 100 99 100 ...
##  $ Pressure(in)         : num [1:7546771] 29.7 29.6 29.7 29.6 29.6 ...
##  $ Visibility(mi)       : num [1:7546771] 10 10 10 9 6 7 7 7 5 3 ...
##  $ Wind_Direction       : chr [1:7546771] "Calm" "Calm" "SW" "SW" ...
##  $ Wind_Speed(mph)      : num [1:7546771] NA NA 3.5 4.6 3.5 3.5 3.5 3.5 1.2 4.6 ...
##  $ Precipitation(in)    : num [1:7546771] 0.02 0 0 0 0 0.03 0 0 0 0.02 ...
##  $ Weather_Condition    : chr [1:7546771] "Light Rain" "Light Rain" "Overcast" "Mostly Cloudy" ...
##  $ Amenity              : logi [1:7546771] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ Bump                 : logi [1:7546771] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ Crossing             : logi [1:7546771] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ Give_Way             : logi [1:7546771] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ Junction             : logi [1:7546771] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ No_Exit              : logi [1:7546771] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ Railway              : logi [1:7546771] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ Roundabout           : logi [1:7546771] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ Station              : logi [1:7546771] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ Stop                 : logi [1:7546771] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ Traffic_Calming      : logi [1:7546771] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ Traffic_Signal       : logi [1:7546771] FALSE FALSE TRUE FALSE TRUE FALSE ...
##  $ Turning_Loop         : logi [1:7546771] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ Sunrise_Sunset       : chr [1:7546771] "Night" "Night" "Night" "Night" ...
##  $ Civil_Twilight       : chr [1:7546771] "Night" "Night" "Night" "Day" ...
##  $ Nautical_Twilight    : chr [1:7546771] "Night" "Night" "Day" "Day" ...
##  $ Astronomical_Twilight: chr [1:7546771] "Night" "Day" "Day" "Day" ...
##  $ date_                : Date[1:7546771], format: "2016-02-08" "2016-02-08" ...
##  $ year_                : num [1:7546771] 2016 2016 2016 2016 2016 ...
##  $ month_               : num [1:7546771] 2 2 2 2 2 2 2 2 2 2 ...
##  $ hour_                : int [1:7546771] 5 6 6 7 7 7 7 7 8 8 ...
##  $ any_precip           : logi [1:7546771] TRUE FALSE FALSE FALSE FALSE TRUE ...
##  $ sevg                 : chr [1:7546771] "more severe" "less severe" "less severe" "more severe" ...
##  $ state_name           : chr [1:7546771] "Ohio" "Ohio" "Ohio" "Ohio" ...
head(acc)
# ACCIDENT SEVERITY COUNTS
sev_count <- acc %>%
  count(sevg)
sev_count
p <- ggplot(sev_count, aes(x = sevg, y = n)) +
  geom_col() +
  scale_y_continuous(labels = scales::comma) +
  labs(
    title = "Accident Counts by Severity (2016 - 2023)",
    x = "Severity",
    y = "Number of Accidents"
  )
ggplotly(p)